In [1]:
import numpy as np
import pandas as pd
In [2]:
df = df=pd.read_excel('Downloads/divorce/divorce.xlsx')
In [3]:
df.head()
Out[3]:
Atr1 Atr2 Atr3 Atr4 Atr5 Atr6 Atr7 Atr8 Atr9 Atr10 ... Atr46 Atr47 Atr48 Atr49 Atr50 Atr51 Atr52 Atr53 Atr54 Class
0 2 2 4 1 0 0 0 0 0 0 ... 2 1 3 3 3 2 3 2 1 1
1 4 4 4 4 4 0 0 4 4 4 ... 2 2 3 4 4 4 4 2 2 1
2 2 2 2 2 1 3 2 1 1 2 ... 3 2 3 1 1 1 2 2 2 1
3 3 2 3 2 3 3 3 3 3 3 ... 2 2 3 3 3 3 2 2 2 1
4 2 2 1 1 1 1 0 0 0 0 ... 2 1 2 3 2 2 2 1 0 1

5 rows × 55 columns

In [4]:
#(0=Never, 1=Seldom, 2=Averagely, 3=Frequently, 4=Always).
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 170 entries, 0 to 169
Data columns (total 55 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   Atr1    170 non-null    int64
 1   Atr2    170 non-null    int64
 2   Atr3    170 non-null    int64
 3   Atr4    170 non-null    int64
 4   Atr5    170 non-null    int64
 5   Atr6    170 non-null    int64
 6   Atr7    170 non-null    int64
 7   Atr8    170 non-null    int64
 8   Atr9    170 non-null    int64
 9   Atr10   170 non-null    int64
 10  Atr11   170 non-null    int64
 11  Atr12   170 non-null    int64
 12  Atr13   170 non-null    int64
 13  Atr14   170 non-null    int64
 14  Atr15   170 non-null    int64
 15  Atr16   170 non-null    int64
 16  Atr17   170 non-null    int64
 17  Atr18   170 non-null    int64
 18  Atr19   170 non-null    int64
 19  Atr20   170 non-null    int64
 20  Atr21   170 non-null    int64
 21  Atr22   170 non-null    int64
 22  Atr23   170 non-null    int64
 23  Atr24   170 non-null    int64
 24  Atr25   170 non-null    int64
 25  Atr26   170 non-null    int64
 26  Atr27   170 non-null    int64
 27  Atr28   170 non-null    int64
 28  Atr29   170 non-null    int64
 29  Atr30   170 non-null    int64
 30  Atr31   170 non-null    int64
 31  Atr32   170 non-null    int64
 32  Atr33   170 non-null    int64
 33  Atr34   170 non-null    int64
 34  Atr35   170 non-null    int64
 35  Atr36   170 non-null    int64
 36  Atr37   170 non-null    int64
 37  Atr38   170 non-null    int64
 38  Atr39   170 non-null    int64
 39  Atr40   170 non-null    int64
 40  Atr41   170 non-null    int64
 41  Atr42   170 non-null    int64
 42  Atr43   170 non-null    int64
 43  Atr44   170 non-null    int64
 44  Atr45   170 non-null    int64
 45  Atr46   170 non-null    int64
 46  Atr47   170 non-null    int64
 47  Atr48   170 non-null    int64
 48  Atr49   170 non-null    int64
 49  Atr50   170 non-null    int64
 50  Atr51   170 non-null    int64
 51  Atr52   170 non-null    int64
 52  Atr53   170 non-null    int64
 53  Atr54   170 non-null    int64
 54  Class   170 non-null    int64
dtypes: int64(55)
memory usage: 73.2 KB
In [5]:
import seaborn as sns

sns.pairplot(df, hue='Class')
Out[5]:
<seaborn.axisgrid.PairGrid at 0x7f82180933a0>
In [38]:
df.Class.value_counts().plot(kind='bar')
Out[38]:
<AxesSubplot:>
In [6]:
y=df.loc[:,"Class"]
x=df.drop(["Class"],axis=1)
In [7]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.33,random_state=0)
In [ ]:
from sklearn.preprocessing import StandardScaler #feature scalling (normalization) always scale trainning before test
sc=StandardScaler()
trainning_scaled=sc.fit_transform(X_train)
test_scaled=sc.transform(X_test)
In [9]:
from sklearn.svm import SVC
svmmodel=SVC(kernel='poly', random_state=0)
svmmodel.fit(X_train,y_train)
Out[9]:
SVC(kernel='poly', random_state=0)
In [10]:
y_predict_train=svmmodel.predict(X_train)
y_predict_test=svmmodel.predict(X_test)
from sklearn.metrics import accuracy_score
print("trainning acc= ",accuracy_score(y_train,y_predict_train))
print("test acc= ",accuracy_score(y_test,y_predict_test))
trainning acc=  0.9823008849557522
test acc=  0.9649122807017544
In [12]:
from sklearn.metrics import confusion_matrix
cn=confusion_matrix(y_test,y_predict_test)
print(cn)
[[29  0]
 [ 2 26]]
In [13]:
from sklearn.metrics import f1_score 
f1_score(y_test,y_predict_test)
Out[13]:
0.962962962962963
In [14]:
from sklearn.tree import DecisionTreeClassifier
DTC = DecisionTreeClassifier()
DTC.fit(X_train, y_train)
Out[14]:
DecisionTreeClassifier()
In [18]:
y_predict_train=DTC.predict(X_train)
y_predict_test=DTC.predict(X_test)
from sklearn.metrics import accuracy_score
print("trainning acc= ",accuracy_score(y_train,y_predict_train))
print("test acc= ",accuracy_score(y_test,y_predict_test))
trainning acc=  1.0
test acc=  0.9473684210526315
In [19]:
from sklearn.metrics import confusion_matrix
cn=confusion_matrix(y_test,y_predict_test)
print(cn)
[[28  1]
 [ 2 26]]
In [20]:
from sklearn.metrics import f1_score 
f1_score(y_test,y_predict_test)
Out[20]:
0.9454545454545454
In [21]:
from sklearn.ensemble import RandomForestClassifier
RF=RandomForestClassifier()
RF.fit(X_train,y_train)
Out[21]:
RandomForestClassifier()
In [22]:
y_predict_train=RF.predict(X_train)
y_predict_test=RF.predict(X_test)
from sklearn.metrics import accuracy_score
print("trainning acc= ",accuracy_score(y_train,y_predict_train))
print("test acc= ",accuracy_score(y_test,y_predict_test))
trainning acc=  1.0
test acc=  0.9649122807017544
In [23]:
from sklearn.metrics import confusion_matrix
cn=confusion_matrix(y_test,y_predict_test)
print(cn)
[[29  0]
 [ 2 26]]
In [24]:
from sklearn.metrics import f1_score 
f1_score(y_test,y_predict_test)
Out[24]:
0.962962962962963
In [26]:
from sklearn.neighbors import KNeighborsClassifier
KNN = KNeighborsClassifier(n_neighbors=3)
KNN.fit(X_train,y_train)
Out[26]:
KNeighborsClassifier(n_neighbors=3)
In [27]:
y_predict_train=KNN.predict(X_train)
y_predict_test=KNN.predict(X_test)
from sklearn.metrics import accuracy_score
print("trainning acc= ",accuracy_score(y_train,y_predict_train))
print("test acc= ",accuracy_score(y_test,y_predict_test))
trainning acc=  0.9823008849557522
test acc=  0.9649122807017544
In [28]:
from sklearn.metrics import confusion_matrix
cn=confusion_matrix(y_test,y_predict_test)
print(cn)
[[29  0]
 [ 2 26]]
In [29]:
from sklearn.metrics import f1_score 
f1_score(y_test,y_predict_test)
Out[29]:
0.962962962962963
In [30]:
from sklearn.naive_bayes import GaussianNB
NB = GaussianNB()
NB.fit(X_train,y_train)
Out[30]:
GaussianNB()
In [31]:
y_predict_train=NB.predict(X_train)
y_predict_test=NB.predict(X_test)
from sklearn.metrics import accuracy_score
print("trainning acc= ",accuracy_score(y_train,y_predict_train))
print("test acc= ",accuracy_score(y_test,y_predict_test))
trainning acc=  0.9911504424778761
test acc=  0.9473684210526315
In [32]:
from sklearn.metrics import confusion_matrix
cn=confusion_matrix(y_test,y_predict_test)
print(cn)
[[28  1]
 [ 2 26]]
In [33]:
from sklearn.metrics import f1_score 
f1_score(y_test,y_predict_test)
Out[33]:
0.9454545454545454
In [34]:
from sklearn.linear_model import LogisticRegression
LG = LogisticRegression(random_state=0)
LG.fit(X_train,y_train)
Out[34]:
LogisticRegression(random_state=0)
In [35]:
y_predict_train=LG.predict(X_train)
y_predict_test=LG.predict(X_test)
from sklearn.metrics import accuracy_score
print("trainning acc= ",accuracy_score(y_train,y_predict_train))
print("test acc= ",accuracy_score(y_test,y_predict_test))
trainning acc=  1.0
test acc=  0.9649122807017544
In [36]:
from sklearn.metrics import confusion_matrix
cn=confusion_matrix(y_test,y_predict_test)
print(cn)
[[29  0]
 [ 2 26]]
In [37]:
from sklearn.metrics import f1_score 
f1_score(y_test,y_predict_test)
Out[37]:
0.962962962962963
In [ ]: